library(raster)
## Loading required package: sp
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.5.2
## rgdal: version: 1.4-8, (SVN revision 845)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.4.2, released 2019/06/28
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: FALSE
## Loaded PROJ.4 runtime: Rel. 5.2.0, September 15th, 2018, [PJ_VERSION: 520]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rgdal/proj
## Linking to sp version: 1.3-2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:raster':
##
## intersect, select, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(ggplot2)
library(pophelper)
## pophelper v2.3.1 ready.
library(scatterpie)
##
## Attaching package: 'scatterpie'
## The following object is masked from 'package:sp':
##
## recenter
library(ggnewscale)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
First plot CV
### parvi
parvi_CV<-read.delim("../data/genetic/output/admixture/parvi/Kerror_parviglumis.txt",
sep=" ", header=FALSE, stringsAsFactors = FALSE) %>%
mutate(K = parse_number(V3)) %>%
select(K, V4) %>% rename(CV=V4)
## Plot CV
p <- ggplot(parvi_CV, aes(x=K, y=CV)) + geom_point(size=1) + geom_line()
p + theme(axis.title.x = element_text(face="bold", size=20),
axis.text.x = element_text(angle=90, vjust=0.5, size=16),
axis.title.y = element_text(face="bold", size=20),
axis.text.y = element_text(size=16)) +
ggtitle(label = "Z. m. parviglumis")
### mexicana
mexi_CV<-read.delim("../data/genetic/output/admixture/mexicana/Kerror_mexicana.txt",
sep=" ", header=FALSE, stringsAsFactors = FALSE) %>%
mutate(K = parse_number(V3)) %>%
select(K, V4) %>% rename(CV=V4)
## Plot CV
p <- ggplot(mexi_CV, aes(x=K, y=CV)) + geom_point(size=1) + geom_line()
p + theme(axis.title.x = element_text(face="bold", size=20),
axis.text.x = element_text(angle=90, vjust=0.5, size=16),
axis.title.y = element_text(face="bold", size=20),
axis.text.y = element_text(size=16)) +
ggtitle(label = "Z. m. mexicana")
The CV error of the Admixture analysis doesn't show a clear K value, so we chose those K values where the slope changes for each species. For ** Z. m. parviglumis** this is K=13 and K=25, and for Z. m. mexicana K= 5, K=10 and K=20.
Load admixture Q data and samples metadata
# get data
parvifiles<-c("../data/genetic/output/admixture/parvi/bytaxa_parviglumis.25.Q",
"../data/genetic/output/admixture/parvi/bytaxa_parviglumis.13.Q")
parvi_list<- readQ(parvifiles)
# Check
attributes(parvi_list)
## $names
## [1] "bytaxa_parviglumis.25.Q" "bytaxa_parviglumis.13.Q"
# Get metadata
meta<-read.delim("../data/genetic/output/ADN_pasap_3604.txt")
parviPlinksamples<-read.table("../data/genetic/output/bytaxa_parviglumis.fam")
parvimeta<-meta[meta$POBL %in% parviPlinksamples$V2, ]
levels(parvimeta$ESTADO)[13]<-"EdoMex"
levels(parvimeta$ESTADO)[14]<-"Michoacan"
# add sample metadata to qlist
attributes(parvi_list[[1]])$row.names<-as.character(parvimeta$POBL)
attributes(parvi_list[[2]])$row.names<-as.character(parvimeta$POBL)
# drop unused levels
parvimeta<-droplevels(parvimeta)
Plot Qvals of both Ks of interest:
p1 <- plotQ(alignK(parvi_list[1:2]),
sortind = "all",
imgoutput="join", returnplot=T, sharedindlab=FALSE,
exportplot=F,basesize=11)
grid.arrange(p1$plot[[1]])
Plot only K=13 ordering by all genetic clusters
p1<-plotQ(parvi_list[2],
returnplot=T,
exportplot=F,
sortind = "all", basesize = 11,
# showindlab= TRUE, useindlab=TRUE, indlabangle=90, indlabsize=0.1,
exportpath = "../data/genetic/output/admixture/parvi")
plot(p1$plot[[1]])
Scatterpies of admixture groups
# Read raw Qval file
Qval<-read.table(paste0("../data/genetic/output/admixture/parvi/bytaxa_parviglumis.13.Q"))
names(Qval)<-paste0("K", 1:ncol(Qval))
# add metadata
Qval<-cbind(parvimeta$POB, parvimeta$INDIV, parvimeta$LONGITUDE, parvimeta$LATITUDE, Qval)
names(Qval)[1:4]<-c("POB", "INDIV", "LONGITUDE", "LATITUDE")
# generate same colors than the ones used for admixture plot
piecolors<-gplots::rich.colors(13)
# plot scatterpie
ppie1<- ggplot() + geom_scatterpie(data=Qval,
aes(x=LONGITUDE,
y=LATITUDE,
group=POB),
color=NA,
cols=paste0("K", 1:13)) +
scale_fill_manual(values= piecolors,
name="Genetic cluster") + theme_bw()
ppie1
Adding populations names:
# plot scatterpie
ppie1 + geom_text(data=parvimeta,
aes(x=parvimeta$LONGITUDE,
y=parvimeta$LATITUDE,
label=POB),
color="grey40",
check_overlap = T,
hjust = 0, vjust=1, nudge_x = 0.0005,
size= 5.5)
## Warning: Use of `parvimeta$LONGITUDE` is discouraged. Use `LONGITUDE` instead.
## Warning: Use of `parvimeta$LATITUDE` is discouraged. Use `LATITUDE` instead.
Plot ordering populations in geographical order
# Get latitudinal order
df<-data.frame(POB=parvimeta$POB, LONGITUDE=parvimeta$LONGITUDE) %>%
unique() %>%
arrange(LONGITUDE)
unique(df$POB)
## [1] BVPU BGUA BQUE BEJU BMAN BSAU BTAR BTAC BNOC BRED BCAR BHUE BTZI BTIQ BTUZ
## [16] BJUA BTAL BPCH BZUL BOTZ BVBR BTEJ BTEL BZAC BAPX BIXC BTCT BMAZ MALI BCOL
## [31] BHUI BMOR BOLI BOAX
## 34 Levels: BAPX BCAR BCOL BEJU BGUA BHUE BHUI BIXC BJUA BMAN BMAZ BMOR ... MALI
# set levels of Pops in latitudinal order
desired_order<-unique(df$POB)
parvimeta$POBorder<-factor(parvimeta$POB, levels = desired_order)
# change levels so that they sort in that order alphabetically
levels(parvimeta$POBorder)<-paste0(1:34, levels(parvimeta$POBorder))
levels(parvimeta$POBorder)[1:9]<-paste0("0", levels(parvimeta$POBorder)[1:9])
# set labels
labset2<-data.frame(POBorder=as.character(parvimeta$POBorder),
ESTADO=as.character(parvimeta$ESTADO), stringsAsFactors = FALSE)
#plot
p<-plotQ(parvi_list[2],
returnplot=T,
exportplot=F,
sortind = "all",
grplab=labset2,
ordergrp=TRUE,
grplabangle= 45, grplabsize=10, linesize=2, divsize = 2, grplabheight= 2, grplabpos=0.5,
exportpath = "../data/genetic/output/admixture/parvi")
plot(p$plot[[1]])
Extract in which PGD do the sampled individuals fall. This would be used to then add this information to the plot and see how well PGD correspond to genetic clusters.
First load raster data of Zea mays spp parviglumis SDM and the PGD that fell within it:
# path to rasters
my_rasters<-c("../data/spatial/modelosDarwinZea/Zea_mays_parviglumis.tif", # SDM
"../data/spatial/areasProxyDivGen/crop_to_sp_final/PDG_Zea_mays_parviglumis.tif") #SDM subdivied by Proxies of gen div
# stack rasters and add nicer name
parvi_rasters<-stack(my_rasters)
# check
names(parvi_rasters)
## [1] "Zea_mays_parviglumis" "PDG_Zea_mays_parviglumis"
# Check projection
proj4string(parvi_rasters$PDG_Zea_mays_parviglumis)
## [1] "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0"
# change projection to longlat for nicer visualization
newcrs <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
parvi_rasters<-projectRaster(parvi_rasters, crs=newcrs, method="ngb")
parvi_rasters<-stack(parvi_rasters)
Z. m. parviglumis has 29 PGD distributed like this:
# Get number of cells of each PDG
table(getValues(parvi_rasters$PDG_Zea_mays_parviglumis))
##
## 0 2 5 6 7 8 10 11 15 20
## 2242616 395 8786 6753 3 9641 425 2954 5340 1025
## 21 23 25 32 36 37 38 39 40 41
## 1070 92 2237 359 7557 31658 1731 393 4462 201
## 42 43 48 58 84 86 90 92 93
## 4 8556 5913 34 1290 6 501 239 50
Plot PGD and sampling points of genetic data.
# crop raster to make it more managable
parvi_small<- raster::crop(parvi_rasters$PDG_Zea_mays_parviglumis,
y=extent(c(-107, -96 , 15.5, 22)))
# convert to df to plot with ggplot
raster_df<-as.data.frame(rasterToPoints(parvi_small, spatial = TRUE))
raster_df$PDG_Zea_mays_parviglumis<-as.character(raster_df$PDG_Zea_mays_parviglumis)
# get nice colors
mycols<-c("grey40","#917031", "#4d6cd0", "#8bbb39", "#c556c5", "#53d06a", "#895dc8", "#42a331", "#d84492", "#69b974", "#d63c48", "#4dc0b1", "#d3542b", "#4b9cd3", "#d2a337", "#9289cf", "#b1b23a", "#9a5398", "#498331", "#cc4668", "#37835d", "#df89be", "#6e7721", "#9b496e", "#b9b06d", "#ac575b", "#65743a", "#e78b7a", "#db873d", "#a2552d")
# plot PGD raster
p<- ggplot() +
geom_raster(data = raster_df , aes(x = x, y = y, fill = PDG_Zea_mays_parviglumis)) +
scale_fill_manual(values= alpha(mycols, 0.5) , name="PGD") +
theme_bw()
# add sampling points
p + geom_point(data=parvimeta, aes(x=LONGITUDE,
y=LATITUDE))
## Warning: Raster pixels are placed at uneven horizontal intervals and will be
## shifted. Consider using geom_tile() instead.
Plot scatterpies with genetic groups
p_piemap<-p + new_scale("fill") + # this is needed because raster and pie have diff fills
geom_scatterpie(data=Qval,
aes(x=LONGITUDE,
y=LATITUDE,
group=POB),
color="NA",
cols=paste0("K", 1:13)) +
scale_fill_manual(values= piecolors,
name="Genetic cluster") +
labs(x="", y= "") + theme(text = element_text(size = 25))
p_piemap
## Warning: Raster pixels are placed at uneven horizontal intervals and will be
## shifted. Consider using geom_tile() instead.
Extract in which PGD each genetic sample falls:
# Create function to estimate the mode, omitting na values
Mode <- function(x) {
ux <- unique(na.omit(x))
ux[which.max(tabulate(match(x, ux)))]
}
# transform 0 value (out of sp SDM range) to NA so that only PGD are considered
parvi_rasters$PDG_Zea_mays_parviglumis<-reclassify(parvi_rasters$PDG_Zea_mays_parviglumis,
rcl=c(0, .1, NA), include.lowest=TRUE)
# extract the PDG most frequent in a given buffer radio of the sampling point
# ignoring NA values (see Mode fun)
PGD_points<-raster::extract(parvi_rasters$PDG_Zea_mays_parviglumis,
buffer=2000, fun=Mode, # so that no points fell in only NAs
y=data.frame(long=parvimeta$LONGITUDE,
lat=parvimeta$LATITUDE)) %>%
as.character()
PGD_points<-parvimeta %>% select(LONGITUDE, LATITUDE, DNASample, POB, POBorder) %>%
mutate(PGD_points=PGD_points)
table(PGD_points$PGD_points)
##
## 10 11 15 36 37 40 41 43 48 5 6 8 84
## 37 44 12 126 685 84 42 288 37 205 123 55 5
# check if there are still NA
sum(is.na(PGD_points$PGD_points))
## [1] 26
PGD_points[is.na(PGD_points$PGD_points), ]
## LONGITUDE LATITUDE DNASample POB POBorder PGD_points
## 1078 -99.28528 16.98056 186_10 BTCT 27BTCT <NA>
## 1079 -99.28528 16.98056 186_11 BTCT 27BTCT <NA>
## 1080 -99.28528 16.98056 186_12 BTCT 27BTCT <NA>
## 1081 -99.28528 16.98056 186_13 BTCT 27BTCT <NA>
## 1082 -99.28528 16.98056 186_14 BTCT 27BTCT <NA>
## 1083 -99.28528 16.98056 186_16 BTCT 27BTCT <NA>
## 1084 -99.28528 16.98056 186_17 BTCT 27BTCT <NA>
## 1085 -99.28528 16.98056 186_18 BTCT 27BTCT <NA>
## 1086 -99.28528 16.98056 186_19 BTCT 27BTCT <NA>
## 1087 -99.28528 16.98056 186_1 BTCT 27BTCT <NA>
## 1088 -99.28528 16.98056 186_20 BTCT 27BTCT <NA>
## 1089 -99.28528 16.98056 186_21 BTCT 27BTCT <NA>
## 1090 -99.28528 16.98056 186_22 BTCT 27BTCT <NA>
## 1091 -99.28528 16.98056 186_23 BTCT 27BTCT <NA>
## 1092 -99.28528 16.98056 186_24 BTCT 27BTCT <NA>
## 1093 -99.28528 16.98056 186_25 BTCT 27BTCT <NA>
## 1094 -99.28528 16.98056 186_26 BTCT 27BTCT <NA>
## 1095 -99.28528 16.98056 186_27 BTCT 27BTCT <NA>
## 1096 -99.28528 16.98056 186_28 BTCT 27BTCT <NA>
## 1097 -99.28528 16.98056 186_29 BTCT 27BTCT <NA>
## 1098 -99.28528 16.98056 186_2 BTCT 27BTCT <NA>
## 1099 -99.28528 16.98056 186_30 BTCT 27BTCT <NA>
## 1100 -99.28528 16.98056 186_4 BTCT 27BTCT <NA>
## 1101 -99.28528 16.98056 186_5 BTCT 27BTCT <NA>
## 1102 -99.28528 16.98056 186_7 BTCT 27BTCT <NA>
## 1103 -99.28528 16.98056 186_9 BTCT 27BTCT <NA>
# assing them to closest PGD
PGD_points[is.na(PGD_points$PGD_points), ]$PGD_points <-48
table(PGD_points$PGD_points)
##
## 10 11 15 36 37 40 41 43 48 5 6 8 84
## 37 44 12 126 685 84 42 288 63 205 123 55 5
Use this information to see in which genetic group our PGD fall:
# set levels of PGD more or less in lontigitudinal order
desired_order<-c("48", "36", "10", "5", "15", "11" , "37", "6", "84", "43", "8", "40", "41")
PGD_points$PGD_order<-factor(PGD_points$PGD_points, levels = desired_order)
# change levels so that they sort in that order alphabetically
levels(PGD_points$PGD_order)<-paste0(1:13, "_",levels(PGD_points$PGD_order))
levels(PGD_points$PGD_order)[1:9]<-paste0("0", levels(PGD_points$PGD_order)[1:9])
# set labels
labsetPGD<-data.frame(PGD_points=as.character(PGD_points$PGD_order),
POBorder=as.character(PGD_points$POBorder),
stringsAsFactors = FALSE)
p3<-plotQ(parvi_list[2],
returnplot=T,
exportplot=T,
sortind = "all",
grplab=labsetPGD, selgrp = "PGD_points",
ordergrp=T,
grplabangle= 90, grplabsize=8, linesize=3, divsize = 3.5,
exportpath = getwd(), height = 20, width = 80)
## Drawing plot ...
## /Volumes/datos/USUARIOS/DarwinUICN/Zonificacion/Zonas_de_vida/analisisUniCons_proxiGen/bin/bytaxa_parviglumis.13.Q.png exported.
plot(p3$plot[[1]])
Omit labels for plot for the paper: